{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# *OPTIONAL* Demo: Reactive plot with multiple regressions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import bqplot.pyplot as plt\n",
    "from ipywidgets import VBox, FloatSlider"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Non-parametric regression"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let us define the random variables $X$ and $Y$ by\n",
    "$$\n",
    "f(x) = x \\frac{1 + x}{1 + x^2}, \\qquad X \\sim \\mathcal{N}(0, 1), \\quad Y = f(X) + \\varepsilon,\n",
    "$$\n",
    "where $\\varepsilon \\sim \\mathcal{N}(0, 1/4)$ is independent of X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "def f(x):\n",
    "    return (x + x ** 2) / (1 + x ** 2)\n",
    "\n",
    "n = 1000\n",
    "sigma = 0.5\n",
    "X = np.random.randn(n)\n",
    "Y = f(X) + sigma * np.random.randn(n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5b354a70e0c84fbcb6018b7668e27cbf",
       "version_major": "2",
       "version_minor": "0"
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(title='Joint sample, and Conditional expecation for (X, Y)')\n",
    "plt.scatter(X, Y, alpha=0.3)\n",
    "mesh = np.linspace(-4, 4, 201)\n",
    "plt.plot(mesh, f(mesh), linewidth=3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Non-parametric regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "def reg_non_param(x, bdwidth, x_sample, y_sample):\n",
    "    \"\"\"Values of the non-parametric regression of Y wrt X using a Gaussian kernel.\"\"\"\n",
    "    def kern(u, x):\n",
    "        \"\"\"Gaussian kernel function\"\"\"\n",
    "        return np.exp(-(u[:, np.newaxis] - x) ** 2 / (2 * bdwidth ** 2))\n",
    "\n",
    "    return np.sum(kern(x_sample, x) * y_sample[:, np.newaxis], axis=0) \\\n",
    "        / np.sum(kern(x_sample, x), axis=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plotting non-parametric regressions of $Y$ with respect to $X$ with different values of the bandwidth:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a39c75bfa5ba467cb8c6e93507f81ed9",
       "version_major": "2",
       "version_minor": "0"
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(legend_location='bottom-right')\n",
    "plt.scatter(X, Y, alpha=0.3)\n",
    "plt.plot(mesh, reg_non_param(mesh, 0.1, X, Y), 'b', linewidth=3, labels=['bandwidth 0.1'], display_legend=True)\n",
    "plt.plot(mesh, reg_non_param(mesh, 0.2, X, Y), 'r', linewidth=3, labels=['bandwidth 0.2'], display_legend=True)\n",
    "plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'g', linewidth=3, labels=['bandwidth 0.5'], display_legend=True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4bc3671aa6064f5985f52bb9c37b1a61",
       "version_major": "2",
       "version_minor": "0"
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "figure     = plt.figure(title='Non-parametric regression')\n",
    "scatter    = plt.scatter(X, Y, alpha=0.3, enable_move=True)\n",
    "regression = plt.plot(mesh, np.zeros(mesh.shape), 'b', linewidth=3)\n",
    "slider     = FloatSlider(min=0.05, max=2.0, value=1.0, step=0.05, description='bandwidth')\n",
    "\n",
    "def update(change=None):\n",
    "    regression.y = reg_non_param(regression.x, slider.value, scatter.x, scatter.y)\n",
    "\n",
    "slider.observe(update, names=['value'])\n",
    "scatter.observe(update, names=['x', 'y'])\n",
    "update()\n",
    "VBox([figure, slider])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Multiple regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "def basis(knots, x):\n",
    "    \"\"\"Values of order-1 B-spline basis functions.\"\"\"\n",
    "    nb_knots = len(knots)\n",
    "    diag = np.identity(nb_knots)\n",
    "    res = np.empty((len(x), nb_knots))\n",
    "    for i in range(nb_knots):\n",
    "        res[:, i] = np.interp(x, knots, diag[i])\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "373379e64b134ed1a337de08068e9f7d",
       "version_major": "2",
       "version_minor": "0"
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "basis_len = 10\n",
    "knots = np.linspace(-3.5, 3.5, basis_len)\n",
    "\n",
    "plt.figure(title='Order-0 B-splines')\n",
    "plt.plot(mesh, basis(knots, mesh).T, linewidth=2)\n",
    "plt.ylim(0.0, 2.0)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "def reg_param_coeffs(knots, x_sample, y_sample):\n",
    "    \"\"\"Computes the coefficients of the P-L regression of y_sample wrt. x_sample.\"\"\"\n",
    "    bis = basis(knots, x_sample)\n",
    "    var = bis.T.dot(bis)\n",
    "    covar = y_sample.dot(bis)\n",
    "    return np.linalg.lstsq(var, covar.T)[0]\n",
    "\n",
    "def eval_piecewise_linear(x, knots, coeffs):\n",
    "    \"\"\"Eveluates the piecewise linear function at the specified x for the knots and coeffs.\n",
    "    \"\"\"\n",
    "    return np.interp(x, knots, coeffs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4c4131b9da5b4af6a2343dac51b04072",
       "version_major": "2",
       "version_minor": "0"
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure()\n",
    "plt.scatter(X, Y, alpha=0.3)\n",
    "\n",
    "knots1 = np.linspace(-3.0, 3.0, 10)\n",
    "plt.plot(mesh, eval_piecewise_linear(mesh, knots1, reg_param_coeffs(knots1, X, Y)), 'b', linewidth=3)\n",
    "\n",
    "knots2 = np.linspace(-3.0, 3.0, 20)\n",
    "plt.plot(mesh, np.interp(mesh, knots2, reg_param_coeffs(knots2, X, Y)), 'r', linewidth=3)\n",
    "\n",
    "plt.title('Different collections of knots')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Penalized multiple regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "def second_derivative_on_dirac_basis(knots):\n",
    "    \"\"\"\n",
    "    Computes the coefficients of the second derivative of the basis functions\n",
    "    on the Dirac comb.\n",
    "    \"\"\"\n",
    "    nb_knots = len(knots)\n",
    "    res = np.zeros((nb_knots, nb_knots))\n",
    "    if nb_knots > 1:\n",
    "        res[0, 0] = -1.0 / (knots[1] - knots[0])\n",
    "        res[0, 1] = 1.0 / (knots[1] - knots[0])\n",
    "    for i in range(1, nb_knots - 1):\n",
    "        res[i, i - 1] = (1.0 / (knots[i] - knots[i - 1]))\n",
    "        res[i, i] = -(1.0 / (knots[i] - knots[i - 1]) + 1.0 / (knots[i + 1] - knots[i]))\n",
    "        res[i, i + 1] = 1.0 / (knots[i + 1] - knots[i])\n",
    "    if nb_knots > 1:\n",
    "        res[nb_knots - 1, nb_knots - 2] = 1.0 / (knots[nb_knots - 1] - knots[nb_knots - 2])\n",
    "        res[nb_knots - 1, nb_knots - 1] = -1.0 / (knots[nb_knots - 1] - knots[nb_knots - 2])\n",
    "    return res\n",
    "\n",
    "def dirac_inner_product(knots, coeffs1, coeffs2):\n",
    "    \"\"\"\n",
    "    Equivalent to the finite-difference approximation for the second derivative.\n",
    "    \"\"\"\n",
    "    nb_knots = len(knots) \n",
    "    res = 0.0\n",
    "    for i in range(nb_knots):\n",
    "        res += 0.5 * (coeffs1[i] * coeffs2[i] + coeffs1[i - 1] * coeffs2[i - 1]) / (knots[i] - knots[i - 1])\n",
    "    return res\n",
    "\n",
    "def tikhonov_matrix(knots):\n",
    "    \"\"\"Computes the second-order Tikhonov matrix of the B-splines corresponding to specified knots.\n",
    "    \n",
    "    Note\n",
    "    ----\n",
    "    The specified array of knots must be non-empty and increasingly sorted.\n",
    "    \"\"\"\n",
    "    basis_len = len(knots)\n",
    "    res = np.zeros((basis_len, basis_len))\n",
    "    coeffs_on_dirac_basis = second_derivative_on_dirac_basis(knots)\n",
    "    influence_order = 2\n",
    "    for i in range(basis_len):\n",
    "        min_j = max(0, i - influence_order)\n",
    "        max_j = min(basis_len, i + influence_order + 1)\n",
    "        for j in range(min_j, max_j):\n",
    "            res[i, j] = dirac_inner_product(knots, coeffs_on_dirac_basis[i], coeffs_on_dirac_basis[j])\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": [
    "def penalized_pl_regression(knots, x_sample, y_sample, tikhonov_factor):\n",
    "    \"\"\"Compute the second-order penalized P-L regression of y_sample wrt. x_sample.\n",
    "    \"\"\"\n",
    "    bis = basis(knots, x_sample)\n",
    "    var = (bis.T).dot(bis) / len(x_sample)\n",
    "    covar = y_sample.dot(bis) / len(x_sample)\n",
    "    tikho = tikhonov_matrix(knots)\n",
    "    \n",
    "    return np.linalg.lstsq(var + tikhonov_factor * tikho, covar.T)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b3a6413975f143159f6832471ff305fd",
       "version_major": "2",
       "version_minor": "0"
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(title='Testing multiple values for Tikhonov factor')\n",
    "plt.scatter(X, Y, alpha=0.3)\n",
    "knots = np.linspace(-3.0, 3.0, 25)\n",
    "plt.plot(mesh, eval_piecewise_linear(mesh, knots, penalized_pl_regression(knots, X, Y, 0.01)), 'r', linewidth=3)\n",
    "plt.plot(mesh, eval_piecewise_linear(mesh, knots, penalized_pl_regression(knots, X, Y, 0.1)), 'g', linewidth=3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "77af689f25e3412483e980467750b099",
       "version_major": "2",
       "version_minor": "0"
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "figure     = plt.figure(title='Non-parametric regression')\n",
    "scatter_s  = plt.scatter(X, Y, alpha=0.3, enable_move=True)\n",
    "spline     = plt.plot(mesh, np.zeros(mesh.shape), 'b', linewidth=3)\n",
    "tikhonov   = FloatSlider(min=0.02, max=1.0, value=0.5, step=0.01, description='Tikhonov')\n",
    "\n",
    "def update_spline(change=None):\n",
    "    spline.y = eval_piecewise_linear(spline.x, knots, \n",
    "        penalized_pl_regression(knots, scatter_s.x, scatter_s.y, tikhonov.value))\n",
    "\n",
    "tikhonov.observe(update_spline, names=['value'])\n",
    "scatter_s.observe(update_spline, names=['x', 'y'])\n",
    "update_spline()\n",
    "VBox([figure, tikhonov])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "widgets-tutorial",
   "language": "python",
   "name": "widgets-tutorial"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "name": "Regression and American Options.ipynb"
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
